clear all
*import dataset
import excel "/Users/mrjensen/Library/CloudStorage/OneDrive-UniversityofNorthCarolinaatChapelHill/Research Projects/A&S/final_union_joined.xlsx", sheet("Sheet1") firstrow clear

gen state1 = real(trim(State))

*US Totals
drop if state1 == 0 
*Nebraska 
drop if state1 == 28 

xtset state1 year, yearly

********************************************************************************
*Key Union Variables - Raw data from CPS - merged outgoing roation groups 
********************************************************************************
sum _union_mem 
sum _union_tot

*Union coverage - 3 & 5 
generate _union3 = (_union_tot + l1._union_tot + l2._union_tot) / 3
generate _union5 = (_union_tot + l1._union_tot + l2._union_tot + l3._union_tot + l4._union_tot) / 5


cor _union3 _union5
ttest _union3 == _union5 
sdtest _union3 == _union5 

gen union_av5 = _union5*100
gen union_av3 = _union3*100

*transform and describe 
describe tot_stgov_emp
sum tot_stgov_emp
gen st_emp = tot_stgov_emp

*Union membership - 3&5
generate _unionmem3 = (_union_mem  + l1._union_mem  + l2._union_mem ) / 3
generate _unionmem5 = (_union_mem  + l1._union_mem  + l2._union_mem  + l3._union_mem  + l4._union_mem ) / 5

cor _unionmem3 _unionmem5
ttest _unionmem3 == _unionmem5 
sdtest _unionmem3 == _unionmem5

gen unionmem_av5 = _unionmem5*100
gen unionmem_av3 = _unionmem3*100

sum unionmem_av5

*DV - Inflation adjusted payrolls ***************************************************
gen lnpayrolls = ln(payrolls2018)
gen lnpayrolls_cur = ln(totPayrollcur + 1)
gen payrollper = payrolls2018/fte1
gen payrollper_cur = totPayrollcur/fte1
gen pay_diff_per1 = ln(payrollper_cur)-ln(payrollper)
gen pay_diff = lnpayrolls_cur - lnpayrolls

gen dfte = fte1 - L.fte1

gen fte_per = fte1/(pop/100000)
gen full_per = FullTime/(pop/100000)
gen part_per = PartTime88/(pop/100000)
gen tot_per = st_emp/(pop/100000)

sum fte1 FullTime PartTime88 st_emp

*control variables ***************************************************
gen id=_n

gen unified_dem = unifed_dem

gen lnpop = ln(pop)

gen oopchg = ((pop-l1.pop)/l1.pop)*100

replace rdf_real = 0 if rdf_real < 0

gen lnrdf = ln(rdf_real)
generate _rdf_3 = (rdf_real + l1.rdf_real+ l2.rdf_real) / 3
generate _rdf_5 = (rdf_real + l1.rdf_real+ l2.rdf_real + l3.rdf_real + l4.rdf_real) / 5

*GDP per capitia 
gen gdpfull = gdp_real * 1000000

fsum gdpfull, format(%15.0f)

fsum pop, format(%15.0f)

fsum fte if year >=2000, format(%15.0f)

gen gdpcapita = gdpfull/pop 

sum gdpcapita, detail

gen gdpcap_change = ((gdpcapita-l1.gdpcapita)/l1.gdpcapita)*100

extremes union_av3 if year <= 2010, n(10)
extremes unionmem_av3 if year <= 2010, n(10)

gen actual_revs = total_rev * 1000000 
fsum actual_revs, format(%15.0f)
gen payroll_share2 = payrolls2018 / actual_revs 
sum payroll_share2
extremes actual_revs, n(10)
gen dfte_per = fte_per - L.fte_per
gen fte_growth = (fte1 - L.fte1) / L.fte1
gen dpt = part_per - L.part_per

* Standardize payroll per FTE by cost of living
gen payroll_adj_col = payrollper / (rpp/100)

* Or log version
gen ln_payroll_adj_col = ln(payrollper) - ln(rpp/100)


/* code for figure 1 and Figures A-E
label define statelab ///
    1  "Alabama" ///
    2  "Alaska" ///
    3  "Arizona" ///
    4  "Arkansas" ///
    5  "California" ///
    6  "Colorado" ///
    7  "Connecticut" ///
    8 "Delaware" ///
    10 "Florida" ///
    11 "Georgia" ///
    12 "Hawaii" ///
    13 "Idaho" ///
    14 "Illinois" ///
    15 "Indiana" ///
    16 "Iowa" ///
    17 "Kansas" ///
    18 "Kentucky" ///
    19 "Louisiana" ///
    20 "Maine" ///
    21 "Maryland" ///
    22 "Massachusetts" ///
    23 "Michigan" ///
    24 "Minnesota" ///
    25 "Mississippi" ///
    26 "Missouri" ///
    27 "Montana" ///
    29 "Nevada" ///
    30 "New Hampshire" ///
    31 "New Jersey" ///
    32 "New Mexico" ///
    33 "New York" ///
    34 "North Carolina" ///
    35 "North Dakota" ///
    36 "Ohio" ///
    37 "Oklahoma" ///
    38 "Oregon" ///
    39 "Pennsylvania" ///
    40 "Rhode Island" ///
    41 "South Carolina" ///
    42 "South Dakota" ///
    43 "Tennessee" ///
    44 "Texas" ///
    45 "Utah" ///
    46 "Vermont" ///
    47 "Virginia" ///
	48 "Washington" ///
	49 "West Virginia" ///
	50 "Wisconsin" ///
	51 "Wyoming" ///
	
label values state1 statelab

*tab state1 year, sum(unionmem_av5)

*statsby mean = r(mean) sd = r(sd), ///
    by(state1 year): summarize unionmem_av5

* 1. State × year means
bysort state year: egen mean_unionmem_av5 = mean(unionmem_av5)
bysort state year: keep if _n == 1
* Mean across all states for each year
bysort year: egen mean_allstates = mean(unionmem_av5)

* 2. Declare panel
xtset state year

set scheme s2color
set scheme plotplain

*set scheme plottig


* 3a. One big spaghetti plot (all states over time)
xtline mean_unionmem_av5, overlay ///
    ytitle("Union membership (%)") ///
    xtitle("Year")

* 3a. 50 plots with US Average trend line (all states over time)
twoway ///
    (line mean_unionmem_av5 year, ///
        by(state, legend(off)) lcolor(gs8)) ///
    (line mean_allstates year, ///
        lcolor(black) lwidth(thick)), ///
    ytitle("Union Membership (%)") ///
    xtitle("Year") ///
    legend(order(2 "All-state mean"))


*plot union membership in 10 state clusters - Appendix plot
import excel "/Users/mrjensen/Downloads/union.xls", sheet("Sheet1") firstrow clear

local yr = 1987
foreach v of varlist B-AG {
    rename `v' y`yr'
    local yr = `yr' + 1
}

reshape long y, i(State) j(year)

xtset State year 
gen y1 = real(y)
xtline y1, overlay

* 2. Keep one row per state1–year
bysort state year: keep if _n == 1

* 3. Drop missing years (reshape j() can't be missing)
drop if missing(year)

* 4. Create "mean (sd)" cell
gen cell = string(mean_unionmem_av5, "%6.2f")

* 5. We only need state1, year, cell for the reshape
keep state year cell

* 6. Reshape to wide: one row per state, one column per year
reshape wide cell, i(state) j(year)
rename cell* y*

* States 1–10
xtline y1 if inrange(State, 1, 10), overlay ///
    title("States 1–10")
	
* States 11–20
xtline y1 if inrange(State, 11, 20), overlay ///
    title("States 11–20")

* States 21–30
xtline y1 if inrange(State, 21, 30), overlay ///
    title("States 21–30")

* States 31–40
xtline y1 if inrange(State, 31, 40), overlay ///
    title("States 31–40")

* States 41–51
xtline y1 if inrange(State, 41, 51), overlay ///
    title("States 41–51")
*/

*summary statistics

xtsum fte1 if asample2 == 1
xtsum PartTime if asample2 == 1 
xtsum FullTime if asample2 == 1 
xtsum payrollper if asample2 == 1
xtsum lnpayrolls if asample2 == 1
xtsum unionmem_av5 if asample2 == 1
xtsum unified_dem if asample2 == 1
xtsum divided_gov if asample2 == 1
xtsum gdpcapita if asample2 == 1
xtsum pov_rt if asample2 == 1
xtsum rdf_real if asample2 == 1
xtsum med_inc_18 if asample2 == 1
xtsum pop if asample2 == 1
xtsum righttowork if asample2 == 1 

*Set sample/wage models
xtreg payrollper i.l1.unified_dem##c.l5.unionmem_av5  i.l1.divided_gov##c.l5.unionmem_av5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year, fe cluster(state1)
est sto m2 
*gen asample2 = e(sample)
*figure 2
margins, at(l5.unionmem_av5=(0(10)100)) over(unified_dem) 
marginsplot, recast(line)  ytitle("Payroll Spending per Employee 1992-2010", axis(1)) xtitle("Union Membership", axis(1)) title(" ") addplot (hist union_av3 if year < 2011 & year >= 2001, discrete yaxis(2) yscale(alt lcolor() axis(2)) ylabel(0 " " 200000 " " 4000000 " " 6000000000 " " 800000000 " " 100000000000 " ", labcolor() axis(2) tlcolor(black) tlwidth(thin) labsize(smal) tl(0)) ytitle(" ", axis(2)) legend(pos(5) ring (0) col(1)))

xtreg payrollper l1.unified_dem l5.unionmem_av5  l1.divided_gov l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year if asample2==1, fe cluster(state1)
est sto m1

xtreg lnpayrolls i.l1.unified_dem##c.l5.unionmem_av5  i.l1.divided_gov##c.l5.unionmem_av5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year if asample2==1, fe cluster(state1) //coeflegend
est sto m4

xtreg lnpayrolls l1.unified_dem l5.unionmem_av5  l1.divided_gov l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year if asample2==1, fe cluster(state1) //coeflegend
est sto m3

*Employment Models
xtreg fte_per i.l1.unified_dem##c.l5.unionmem_av5  i.l1.divided_gov##c.l5.unionmem_av5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year if asample2 == 1, fe cluster(state1)

est sto m31 

xtreg part_per i.l1.unified_dem##c.l5.unionmem_av5  i.l1.divided_gov##c.l5.unionmem_av5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year if asample2 == 1, fe cluster(state1)

est sto m33

xtreg fte1 i.l1.unified_dem##c.l5.unionmem_av5  i.l1.divided_gov##c.l5.unionmem_av5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year if asample2 == 1, fe cluster(state1)

est sto m32 

xtreg PartTime88 i.l1.unified_dem##c.l5.unionmem_av5  i.l1.divided_gov##c.l5.unionmem_av5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year if asample2 == 1, fe cluster(state1)

est sto m34

*Appendix tables
xtreg pay_diff i.l1.unified_dem##c.l5.unionmem_av5 i.l1.divided_gov##c.l5.unionmem_av5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year if asample2 == 1, fe cluster(state1)

est sto m1

sum pay_diff_per

xtreg dfte i.l1.unified_dem##c.l5.unionmem_av5 i.l1.divided_gov##c.l5.unionmem_av5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year if asample2 == 1, fe cluster(state1)

est sto m1

xtreg dfte l1.unified_dem l1.divided_gov l5.union_mem_5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year if asample2 == 1, fe cluster(state1)

est sto m2

xtreg dpt i.l1.unified_dem##c.l5.unionmem_av5 i.l1.divided_gov##c.l5.unionmem_av5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year if asample2 == 1, fe cluster(state1)

est sto m3

xtreg dpt l1.unified_dem l1.divided_gov l5.union_mem_5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year if asample2 == 1, fe cluster(state1)

est sto m4

xtreg dfte_per  l1.unified_dem l1.divided_gov l5.unionmem_av5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year if asample2 == 1, fe cluster(state1)

est sto m3

xtreg dfte_per  l1.unified_dem l1.divided_gov l5.unionmem_av5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year if asample2 == 1, fe cluster(state1)

est sto m3

xtreg lnpayrolls i.l1.unified_dem##c.l5.unionmem_av5 i.l1.divided_gov##c.l5.unionmem_av5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year if asample2 == 1, fe cluster(state1)

est sto m4

xtreg fte_per i.l1.unified_dem##c.l5.unionmem_av5 i.l1.divided_gov##c.l5.unionmem_av5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year, fe cluster(state1)

est sto m5

xtreg fte1 i.l1.unified_dem##c.l5.unionmem_av5 i.l1.divided_gov##c.l5.unionmem_av5 l1.gdpcapita l1.pov_rt l1.lnrdf l1.lnpop l1.med_inc_18 l1.righttowork i.year, fe cluster(state1)

est sto m6

*Table output
esttab m9 m11 m13 m15 using results.rtf, replace cells(b(star fmt(3)) se(par fmt(3))) starlevels( ^ 0.10 * 0.05 ** 0.010 *** 0.001) legend label varlabels(_cons Constant lnpayrolls Ln(Payroll) payrollper Pay_per_Worker l5.unionmem_av3 Union_Membership unified_dem Unified_Dem unifed_dem Unified_Dem divided_gov Divided_gov gdpcapita GDP_per_capita pov_rt Poverty_Rate lnrdf Ln(RDF) lnpop Population_change med_inc_18 Median_income) stats(N F r2 r2_a aic bic, labels("No. of Obs." "F Statistic" "R-Squared" "Adj. R-Squared" "AIC" "BIC") star(F)) addnote("Robust Standard Errors in Parentheses")
	
